RQ5: complete case analysis vs imputation analysis

Load data

docker container used: bignardig/tidyverse451:v7

Code
source("0_load_data.R")

bootstrap_summary_df     = readRDS(file = file.path("results", "5_bootstrap_summary_df.Rds"))
bootstrap_summary_df_ace = readRDS(file = file.path("results", "5_bootstrap_summary_df_ace.Rds"))
boot_compare_results     = readRDS(file = file.path("results", "5_boot_compare_results.Rds"))
# ace_comparisons      = readRDS(file = file.path("results", "5_ace_comparisons.Rds"))

imputed_mice             = readRDS(file.path("results","5_1_all_imputations.Rds")) # Less-cleaned MICE output (useful for getting diagnostics...)

# original_variances   = sapply(rq5y, function(v) sd(df[[v]], na.rm = TRUE)^2)

bootstrap_summary_df = bootstrap_summary_df %>%
  group_by(parameter) %>%
  mutate(
    pval_adj = stats::p.adjust(pval, method = "holm")
  )

Load imputed dataset (cominbed)

Code
# Load the imputed dataset
df_rq5_imputed <- readRDS(file.path("data", "df_rq5_imputed.Rds")) # Cleaned and put-together imputed data

# Sample size

df_rq5_imputed %>%
  filter(.imp ==1) %>%
  nrow()
[1] 12976
Code
# Create comparison datasets - only rq5y variables
original_dataset <- df %>%
  filter(!(randomfamid %in% exclude_fams_onesib)) %>%
  filter(!(randomfamid %in% rq5_exclude_fams)) %>%
  filter(!(randomfamid %in% rq5_exclude_fams_2)) %>%                            # Exclude fams with less than 30% data on all imputed data (excluding baseline data)
  select(all_of(rq5y)) 

# imputed_dataset <- df_rq5_imputed %>%
#   select(all_of(rq5y))

Descriptive stats

Missingness plots

Code
df %>%
  select(amohqualn,all_of(rq5y)) %>%
  `colnames<-`(c("Y1: Mother Education", rq5y_labels_short)) %>%
  # select(rq5y) %>%
  # `colnames<-`(rq5y_labels_short) %>%
  as.data.frame() %>% # Note that this is needed for function to work - could improve? 
  gbtoolbox::plot_correlations(
    confidence_interval =  FALSE,
    sample_size = FALSE
  )
Warning in gbtoolbox::plot_correlations(., confidence_interval = FALSE, : This function is in development, and not yet ready for widespread use. 
  Proceed with caution

Code
save_plot("5_3_descriptive_plot_correlations_rq5y", width = 5.3, height = 5.3)

df %>%
  select(amohqualn,all_of(rq5y)) %>%
  `colnames<-`(c("Y1: Mother Education", rq5y_labels_short)) %>%
  as.data.frame() %>%
  gbtoolbox::plot_missing_correlations(
    n_decimal_places = 2,
    cluster_variables = FALSE
    )
Warning in gbtoolbox::plot_missing_correlations(., n_decimal_places = 2, : This function is in early beta, and not yet ready for widespread use. 
  Proceed with caution

Code
save_plot("5_3_descriptive_plot_missing_correlations_rq5y", width = 5.3, height = 5.3)

# takes a long time to run with all the vairables in 
if(FALSE){
df %>% 
  select(any_of(c(rq5y, rq5z))) %>%
  select(where(is.numeric)) %>%
  select(rq5y, everything()) %>%
  as.data.frame() %>%
  gbtoolbox::plot_missing_correlations(cluster_variables = FALSE, textadjust = 0) 
}

Missing data frequency plot

Click here for full-size plot

Code
df %>%
  select(any_of(rq5z)) %>%
  select(!ends_with("2")) %>% # because the data is in a long format, we don't need the twin 2 variables! 
  apply(.,2,function(x) length(which(!is.na(x)))/length(x)) %>%
  as.data.frame() %>%
  rownames_to_column() %>%
  `colnames<-`(c("var","percent_notmissing")) %>%
  mutate(
    # var       = factor(var, levels = rq5z),
    var_label = sapply(var, function(x) ifelse(!is.null(var_to_label(x)[[1]]), var_to_label(x), x)), 
    var_label = paste0(var_label, " (", var, ")"),
    var_label = factor(var_label, levels = var_label)
  ) %>%                                                                          #pull(var_label) %>% duplicated() %>% table()
  arrange(percent_notmissing) %>%
  ggplot(aes(x = percent_notmissing, y = var_label)) + 
  geom_col() +
  geom_vline(aes(xintercept=.2)) +
  theme_bw() +
  labs(x="Percent of not-missing data", y = NULL)

Code
# 
save_plot("5_3_descriptive_missing_data_frequency_auxillaryvars", width = 12, height = 32)

df %>%
  select(any_of(rq5y)) %>%
  # filter(rowSums(!is.na(.)) > 0) %>% 
  apply(.,2,function(x) length(which(!is.na(x)))/length(x)) %>%
  as.data.frame() %>%
  rownames_to_column() %>%
  `colnames<-`(c("var","percent_notmissing")) %>%
  mutate(var_label = factor(var, levels = rev(rq5y), labels = rev(rq5y_labels_short))) %>%
  ggplot(aes(x = percent_notmissing, y = var_label)) + 
  geom_col() +
  geom_vline(aes(xintercept=.2)) +
  geom_text(aes(label = paste0(round(percent_notmissing*100),"%")), hjust=1.2) +
  theme_bw() +
  labs(x="Percent of not-missing data", y = NULL)

Code
save_plot("5_3_descriptive_missing_data_frequency", width = 12, height = 32)

df %>%
  select(cohort, any_of(rq5y)) %>%
  pivot_longer(cols = -cohort, names_to = "var", values_to = "value") %>%
  mutate(var = factor(var, levels = rev(rq5y), labels = rev(rq5y_labels_short))) %>%
  group_by(cohort, var) %>%
  summarise(
    total_n = dplyr::n(),
    not_missing_n = sum(!is.na(value)),
    percent_notmissing = not_missing_n / total_n,
    .groups = "drop"
  ) %>%
    mutate(
      # var_label = factor(var, levels = unique(var), labels = var_to_label(unique(var))),
      cohort = factor(cohort,
        levels = c("Cohort 4: twins born Sep-96 to Dec-96",
                   "Cohort 3: twins born Sep-95 to Aug-96",
                   "Cohort 2: twins born Sep-94 to Aug-95",
                   "Cohort 1: twins born Jan-94 to Aug-94"),
        labels = c("4", "3", "2", "1")
      )
    ) %>%
  ggplot(aes(x = percent_notmissing, y = var, fill = cohort)) +
  geom_col(position = position_dodge(width = 0.8), width = 0.7) +
  geom_vline(aes(xintercept = 0.2), linetype = "dashed") +
  geom_text(
    aes(label = paste0(round(percent_notmissing*100), "%")),
    position = position_dodge(width = 0.8),
    hjust = -.2,
    size = 3
  ) +
  theme_bw() +
  labs(
    x = "Percent of not-missing data BY COHORT",
    y = NULL,
    fill = "Cohort"
  ) +
  theme(
    legend.position = "bottom",
    axis.text.y = element_text(size = 8)
  ) + 
  guides(fill = guide_legend(reverse = TRUE))

Code
save_plot("5_3_descriptive_missing_data_frequency_bycohort", width = 8, height = 13)

Descriptive table of imputation variables

Code
  get_levels_summary <- function(x) {
    if (is.factor(x)) {
      paste(levels(x), collapse = ", ")
    } else {
      unique_vals <- na.omit(unique(x))
      if (length(unique_vals) <= 6) {
        paste(unique_vals, collapse = ", ")
      } else {
        # paste(length(unique_vals), "unique values")
        paste("")
      }
    }
  }

# List of all variables for imputation
df_rq5 = df %>%
  select(any_of(rq5z))

# Descriptive information on each imputed variable
impute_df = data.frame(
  var   = colnames(df_rq5),
  label = as.character(sapply(df_rq5, function(x) attr(x, "label"))),
  levels = map_chr(df_rq5, get_levels_summary),
  class = as.character(sapply(df_rq5, function(x) class(x))),
  perc_not_missing = as.numeric(sapply(df_rq5, function(x) round(length(which(!is.na(x)))/length(x)*100))),
  sd    = round(as.numeric(sapply(df_rq5, function(x) sd(as.numeric(x), na.rm = TRUE) )),2),
  distinct_categories = as.numeric(sapply(df_rq5, function(x) length(na.omit(unique(x)))))
)

impute_df <- impute_df %>%
  mutate(variable_year = case_when(
    str_starts(var, "a") ~ "Year 1 (1st Contact)",
    str_starts(var, "b") ~ "Year 2",
    str_starts(var, "c") ~ "Year 3",
    str_starts(var, "d") ~ "Year 4",
    str_starts(var, "g") ~ "Year 7",
    str_starts(var, "h") ~ "Year 8",
    str_starts(var, "i") ~ "Year 9",
    str_starts(var, "j") ~ "Year 10",
    str_starts(var, "l") ~ "Year 12",
    str_starts(var, "n") ~ "Year 14",
    str_starts(var, "p") ~ "Year 16",
    str_starts(var, "r") ~ "Year 18",
    str_starts(var, "u") ~ "Year 21",
    str_starts(var, "z") ~ "Year 26",
    TRUE ~ "Other"
  ))


impute_df %>%
  arrange(perc_not_missing) %>%
  knitr::kable()
var label levels class perc_not_missing sd distinct_categories variable_year
lp3ac1 lp3ac1 3-subject (English, maths, science) overall mean NC level from parent-reported school report at 12, 2-7 numeric 14 0.80 16 Year 12
lp3ac2 lp3ac2 3-subject (English, maths, science) overall mean NC level from parent-reported school report at 12, 2-7 numeric 14 0.80 16 Year 12
jcnv1 jcnv1 Child Non-verbal composite (10 year twin web), standardised numeric 20 1.00 1034 Year 10
jcnv2 jcnv2 Child Non-verbal composite (10 year twin web), standardised numeric 20 1.00 1032 Year 10
jcvb1 jcvb1 Child Verbal composite (10 year twin web), standardised numeric 20 1.00 2002 Year 10
jcvb2 jcvb2 Child Verbal composite (10 year twin web), standardised numeric 20 1.00 1998 Year 10
it3ac1 it3ac1 3-subject (English, maths, science) mean NC level (9 year teacher), 1-5 numeric 21 0.58 78 Year 9
npks3t3a1 npks3t3a1 End of KS3 3-subject Academic achievement mean level (from parent SLQ), 1-9 numeric 21 0.97 25 Year 14
npks3t3a2 npks3t3a2 End of KS3 3-subject Academic achievement mean level (from parent SLQ), 1-9 numeric 21 0.98 25 Year 14
pcchatot1 pcchatot1 Chaos total score (child web at 16), 0-12 numeric 21 2.04 20 Year 16
pcchatot2 pcchatot2 Chaos total score (child web at 16), 0-12 numeric 21 2.04 20 Year 16
jt3ac1 jt3ac1 3-subject (English, maths, science) mean NC level (10 year teacher), 1-5 numeric 22 0.66 82 Year 10
icvb1 icvb1 Verbal composite (9 year child), standardised numeric 25 1.00 399 Year 9
icvb2 icvb2 Verbal composite (9 year child), standardised numeric 25 1.00 398 Year 9
icnv1 icnv1 Non-Verbal composite (9 year child), standardised numeric 25 1.00 440 Year 9
icnv2 icnv2 Non-Verbal composite (9 year child), standardised numeric 25 1.00 439 Year 9
icsdqcont1 icsdqcont1 SDQ Conduct scale (child self-report) at 9, 0-10 numeric 25 1.86 20 Year 9
icsdqcont2 icsdqcont2 SDQ Conduct scale (child self-report) at 9, 0-10 numeric 25 1.85 20 Year 9
icsdqhypt1 icsdqhypt1 SDQ Hyperactivity scale (child self-report) at 9, 0-10 numeric 25 2.28 21 Year 9
icsdqhypt2 icsdqhypt2 SDQ Hyperactivity scale (child self-report) at 9, 0-10 numeric 25 2.28 21 Year 9
ipsdqhypt1 ipsdqhypt1 SDQ Hyperactivity scale (parent) at 9, 0-10 numeric 25 2.39 21 Year 9
ipsdqhypt2 ipsdqhypt2 SDQ Hyperactivity scale (parent) at 9, 0-10 numeric 25 2.39 21 Year 9
ipsdqcont1 ipsdqcont1 SDQ Conduct scale (parent) at 9, 0-10 numeric 25 1.45 16 Year 9
ipsdqcont2 ipsdqcont2 SDQ Conduct scale (parent) at 9, 0-10 numeric 25 1.45 16 Year 9
icparnegt1 icparnegt1 Negative parental feelings scale, child-self-rated at 9, 0-8 numeric 25 1.83 13 Year 9
icparnegt2 icparnegt2 Negative parental feelings scale, child-self-rated at 9, 0-8 numeric 25 1.83 13 Year 9
ipanxt1 ipanxt1 ARBQ overall anxiety total (9 year parent), 0-50 numeric 25 6.56 134 Year 9
ipanxt2 ipanxt2 ARBQ overall anxiety total (9 year parent), 0-50 numeric 25 6.58 135 Year 9
ipchatot ipchatot Parent Chaos scale at 9, 0-12 numeric 25 2.14 19 Year 9
ncconint1 ncconint1 Conners Inattention scale at 14 (child), 0-27 numeric 25 4.62 57 Year 14
ncconint2 ncconint2 Conners Inattention scale at 14 (child), 0-27 numeric 25 4.63 57 Year 14
ncconhit1 ncconhit1 Conners Hyperactivity-Impulsivity scale at 14 (child), 0-27 numeric 25 4.31 57 Year 14
ncconhit2 ncconhit2 Conners Hyperactivity-Impulsivity scale at 14 (child), 0-27 numeric 25 4.31 57 Year 14
npconnt1 npconnt1 Conners total scale at 14 (parent), 0-54 numeric 25 8.43 129 Year 14
npconnt2 npconnt2 Conners total scale at 14 (parent), 0-54 numeric 25 8.44 129 Year 14
ncparnegt1 ncparnegt1 Negative Parental Feelings scale at 14 (child), 0-8 numeric 25 1.96 13 Year 14
ncparnegt2 ncparnegt2 Negative Parental Feelings scale at 14 (child), 0-8 numeric 25 1.96 13 Year 14
ncchato1 ncchato1 Chaos at home total scale at 14 (child), 0-12 numeric 25 1.95 23 Year 14
ncchato2 ncchato2 Chaos at home total scale at 14 (child), 0-12 numeric 25 1.95 23 Year 14
cens01pop98density cens01pop98density Census data 2001 (code KS001) linked to 1998 postcode: population density, N per hectare numeric 27 33.88 39 Year 3
zmhsdqcont1 zmhsdqcont1 SDQ Conduct total score (TEDS26 twin MHQ), 0-10 numeric 30 1.10 11 Year 26
zmhsdqcont2 zmhsdqcont2 SDQ Conduct total score (TEDS26 twin MHQ), 0-10 numeric 30 1.10 11 Year 26
zmhsdqhypt1 zmhsdqhypt1 SDQ Hyperactivity total score (TEDS26 twin MHQ), 0-10 numeric 30 2.19 13 Year 26
zmhsdqhypt2 zmhsdqhypt2 SDQ Hyperactivity total score (TEDS26 twin MHQ), 0-10 numeric 30 2.18 13 Year 26
u1csdqcont1 u1csdqcont1 SDQ Conduct total score (TEDS21 phase 1 twin qnr), 0-10 numeric 35 1.32 15 Year 21
u1csdqcont2 u1csdqcont2 SDQ Conduct total score (TEDS21 phase 1 twin qnr), 0-10 numeric 35 1.32 15 Year 21
u1csdqhypt1 u1csdqhypt1 SDQ Hyperactivity total score (TEDS21 phase 1 twin qnr), 0-10 numeric 35 2.19 15 Year 21
u1csdqhypt2 u1csdqhypt2 SDQ Hyperactivity total score (TEDS21 phase 1 twin qnr), 0-10 numeric 35 2.19 15 Year 21
pcbhsdqcont1 pcbhsdqcont1 SDQ Conduct scale (child behaviour qnr at 16), 0-10 numeric 38 1.47 20 Year 16
pcbhsdqcont2 pcbhsdqcont2 SDQ Conduct scale (child behaviour qnr at 16), 0-10 numeric 38 1.48 20 Year 16
pcbhsdqhypt1 pcbhsdqhypt1 SDQ Hyperactivity scale (child behaviour qnr at 16), 0-10 numeric 38 2.31 20 Year 16
pcbhsdqhypt2 pcbhsdqhypt2 SDQ Hyperactivity scale (child behaviour qnr at 16), 0-10 numeric 38 2.31 20 Year 16
ppbhsdqcont1 ppbhsdqcont1 SDQ Conduct scale (parent behaviour qnr at 16), 0-10 numeric 38 1.39 19 Year 16
ppbhsdqcont2 ppbhsdqcont2 SDQ Conduct scale (parent behaviour qnr at 16), 0-10 numeric 38 1.40 20 Year 16
ppbhsdqhypt1 ppbhsdqhypt1 SDQ Hyperactivity scale (parent behaviour qnr at 16), 0-10 numeric 38 1.98 19 Year 16
ppbhsdqhypt2 ppbhsdqhypt2 SDQ Hyperactivity scale (parent behaviour qnr at 16), 0-10 numeric 38 1.99 19 Year 16
ppbhanxt1 ppbhanxt1 ARBQ Anxiety overall total scale (parent behaviour qnr at 16), 0-38 numeric 38 4.19 83 Year 16
ppbhanxt2 ppbhanxt2 ARBQ Anxiety overall total scale (parent behaviour qnr at 16), 0-38 numeric 38 4.21 84 Year 16
gcl2 gcl2 Child Verbal/Language composite (7 year twin phone), standardised numeric 39 1.00 321 Year 7
gcl1 gcl1 Child Verbal/Language composite (7 year twin phone), standardised numeric 40 1.00 321 Year 7
gcn1 gcn1 Child Non-verbal composite (7 year twin phone), standardised numeric 40 1.00 232 Year 7
gcn2 gcn2 Child Non-verbal composite (7 year twin phone), standardised numeric 40 1.00 231 Year 7
lcsdqhypt1 lcsdqhypt1 SDQ Hyperactivity scale (child self-report) at 12, 0-10 numeric 44 2.30 21 Year 12
lcsdqhypt2 lcsdqhypt2 SDQ Hyperactivity scale (child self-report) at 12, 0-10 numeric 44 2.31 21 Year 12
lcsdqcont1 lcsdqcont1 SDQ Conduct scale (child self-report) at 12, 0-10 numeric 44 1.66 21 Year 12
lcsdqcont2 lcsdqcont2 SDQ Conduct scale (child self-report) at 12, 0-10 numeric 44 1.66 21 Year 12
lpsdqhypt1 lpsdqhypt1 SDQ Hyperactivity scale (parent) at 12, 0-10 numeric 44 2.27 21 Year 12
lpsdqhypt2 lpsdqhypt2 SDQ Hyperactivity scale (parent) at 12, 0-10 numeric 44 2.28 21 Year 12
lpsdqcont1 lpsdqcont1 SDQ Conduct scale (parent) at 12, 0-10 numeric 44 1.47 18 Year 12
lpsdqcont2 lpsdqcont2 SDQ Conduct scale (parent) at 12, 0-10 numeric 44 1.47 18 Year 12
lcmfqt1 lcmfqt1 MFQ scale from 11 MFQ items (child self-report) at 12, 0-22 numeric 44 3.33 43 Year 12
lcmfqt2 lcmfqt2 MFQ scale from 11 MFQ items (child self-report) at 12, 0-22 numeric 44 3.34 43 Year 12
lpmfqt1 lpmfqt1 MFQ scale from 11 MFQ items (parent) at 12, 0-22 numeric 44 2.33 46 Year 12
lpmfqt2 lpmfqt2 MFQ scale from 11 MFQ items (parent) at 12, 0-22 numeric 44 2.33 46 Year 12
lpconnt1 lpconnt1 Conners ADHD overall scale (parent) at 12, 0-54 numeric 44 8.63 142 Year 12
lpconnt2 lpconnt2 Conners ADHD overall scale (parent) at 12, 0-54 numeric 44 8.67 142 Year 12
lcparnegt1 lcparnegt1 Parental Feelings negative subscale (child self-reported) at 12, 0-8 numeric 44 1.80 13 Year 12
lcparnegt2 lcparnegt2 Parental Feelings negative subscale (child self-reported) at 12, 0-8 numeric 44 1.81 13 Year 12
lpchatot lpchatot Chaos scale (parent-reported, from 5 items) at 12, 0-10 numeric 44 1.83 15 Year 12
lcchato1 lcchato1 Chaos scale (child self-reported, from 6 items) at 12, 0-12 numeric 44 2.04 23 Year 12
lcchato2 lcchato2 Chaos scale (child self-reported, from 6 items) at 12, 0-12 numeric 44 2.04 23 Year 12
gt2ac1 gt2ac1 2-subject (maths, English) mean NC level (7 year teacher), 0-4 numeric 46 0.54 44 Year 7
gt2ac2 gt2ac2 2-subject (maths, English) mean NC level (7 year teacher), 0-4 numeric 46 0.55 44 Year 7
hconnt1 hconnt1 Conners ADHD overall Total at 8 (0-54) numeric 50 9.37 167 Year 8
hconnt2 hconnt2 Conners ADHD overall Total at 8 (0-54) numeric 50 9.44 167 Year 8
dscv1 dscv1 Verbal cognitive ability composite (4 Year), standardised numeric 57 1.00 126 Year 4
dscv2 dscv2 Verbal cognitive ability composite (4 Year), standardised numeric 57 1.00 126 Year 4
gpsdqhypt1 gpsdqhypt1 SDQ hyperactivity total (7 year parent), 0-10 numeric 57 2.56 20 Year 7
gpsdqhypt2 gpsdqhypt2 SDQ hyperactivity total (7 year parent), 0-10 numeric 57 2.56 20 Year 7
gpsdqcont1 gpsdqcont1 SDQ conduct total (7 year parent), 0-10 numeric 57 1.64 17 Year 7
gpsdqcont2 gpsdqcont2 SDQ conduct total (7 year parent), 0-10 numeric 57 1.64 17 Year 7
gpanxt1 gpanxt1 ARBQ overall anxiety total (7 year parent), 0-52 numeric 57 6.77 135 Year 7
gpanxt2 gpanxt2 ARBQ overall anxiety total (7 year parent), 0-52 numeric 57 6.78 137 Year 7
dscnv1 dscnv1 Non-verbal cognitive ability composite (4 Year), standardised numeric 59 1.00 608 Year 4
dscnv2 dscnv2 Non-verbal cognitive ability composite (4 Year), standardised numeric 59 1.00 608 Year 4
dsdqcont1 dsdqcont1 SDQ Conduct scale (4 Year), 0-10 numeric 59 1.56 21 Year 4
dsdqcont2 dsdqcont2 SDQ Conduct scale (4 Year), 0-10 numeric 59 1.56 21 Year 4
dsdqhypt1 dsdqhypt1 SDQ Hyperactivity scale (4 Year), 0-10 numeric 59 2.33 21 Year 4
dsdqhypt2 dsdqhypt2 SDQ Hyperactivity scale (4 Year), 0-10 numeric 59 2.34 21 Year 4
danxt1 danxt1 ARBQ total scale (4 Year), 0-36 numeric 59 4.80 104 Year 4
danxt2 danxt2 ARBQ total scale (4 Year), 0-36 numeric 59 4.82 105 Year 4
dchatot dchatot Chaos overall scale (4 Year), standardised numeric 59 1.00 466 Year 4
atwclub atwclub Member of a Twins Club (1st Contact), 1Y 0N 0, 1 factor 97 0.47 2 Year 1 (1st Contact)
amumagetw amumagetw Age in years of natural mother at time of birth of twins numeric 98 4.81 31 Year 1 (1st Contact)
asingle asingle Single Parent cohabiting biological mother and father / cohabiting biological parent with other, single parent factor 98 0.26 2 Year 1 (1st Contact)
amohqualn amohqualn Maternal Education (formatted as numeric) numeric 98 1.99 8 Year 1 (1st Contact)
amosoc2 amosoc2 Mother SOC employment level (1st Contact), 1-9 caring for children at home, 1, 2, 3, 4, 5, 6, 7, 8, 9, no job factor 99 3.66 11 Year 1 (1st Contact)
alang alang Main language spoken at home (1st Contact), see value labels other, English, English + other factor 99 0.20 3 Year 1 (1st Contact)
cohort cohort School cohort, see value labels Cohort 1: twins born Jan-94 to Aug-94, Cohort 2: twins born Sep-94 to Aug-95, Cohort 3: twins born Sep-95 to Aug-96, Cohort 4: twins born Sep-96 to Dec-96 factor 100 0.95 4 Year 3
aethnic aethnic Ethnic origin of twins, simplified coding (1st Contact), 1=white 0=other 1, 0 numeric 100 0.27 2 Year 1 (1st Contact)
anoldsibn anoldsibn Number of older siblings (formatted as numeric variable) 1, 0, 2, 3, 4, 5 numeric 100 0.96 6 Year 1 (1st Contact)
anyngsibn anyngsibn Number of younger siblings (formatted as numeric variable) 0, 1, 2 numeric 100 0.21 3 Year 1 (1st Contact)
asmoke asmoke Smoked cigarettes while pregnant (1st Contact), 1Y 0N 0, 1 factor 100 0.39 2 Year 1 (1st Contact)

Missing data proportions by group

Interestingly, MZ twins have more missing data than DZ twins

Code
# Check proportion of missing data by sexzyg group
df %>%
  select(sexzyg, any_of(rq5y)) %>%
  group_by(sexzyg) %>%
  summarise(
    n = dplyr::n(),
    total_cells = dplyr::n() * length(rq5y),
    missing_cells = sum(is.na(c_across(any_of(rq5y)))),
    overall_prop_missing = missing_cells / total_cells,
    .groups = "drop"
  ) %>%
  knitr::kable(digits = 2)
sexzyg n total_cells missing_cells overall_prop_missing
MZ male 4210 75780 54394 0.72
DZ male 4362 78516 59108 0.75
MZ female 4537 81666 47036 0.58
DZ female 4178 75204 45573 0.61
DZ opposite sexes 8753 157554 110609 0.70
Code
# Gives the same results! 
# df %>%
#   select(sexzyg, any_of(rq5y)) %>%
#   pivot_longer(cols = any_of(rq5y)) %>%
#   group_by(sexzyg) %>%
#   summarise(
#     total_cells = length(value),
#     missing_cells = sum(is.na(value)),
#     overall_prop_missing = missing_cells / total_cells,
#     .groups = "drop"
#   ) %>%
#   knitr::kable(digits =2)

Number of missing cells per participant for rq5y variables

For the variables G composite scale from child web tests at 12, standardised, G composite scale from child web tests at 14, standardised, G composite scale from child web tests at 16, standardised, G-game overall total score, 0-40, End of KS3 all-subject Academic achievement mean level (from parent SLQ), 1-9, Core subjects (English, maths, science): mean grade in GCSE results (twin exams at 16), 4-11, Twin probable highest level of qualification including current study (TEDS21 phase 1 twin qnr), 1-11 see value labels, Demographics item: highest qualification ordinal level (TEDS26 twin MHQ), see value labels, MFQ scale from 11 MFQ items (child self-report) at 12, 0-22, MFQ total scale (child behaviour qnr at 16), 0-26, MFQ overall total score (TEDS21 phase 1 twin qnr), 0-16, MFQ overall total score (TEDS26 twin MHQ), 0-26, General Anxiety overall total score (TEDS21 phase 2 twin qnr), 0-40, GAD-D (General Anxiety) overall total score (TEDS26 twin MHQ), 0-40, SDQ Externalising scale at 12, SDQ Externalising scale at 16, SDQ Externalising scale at 21, SDQ Externalising scale at 26, how many cells is each participant missing.

Code
df %>%
  select(any_of(rq5y)) %>%
  mutate(missing_count = rowSums(is.na(.))) %>%
  select(missing_count) %>%
  count(missing_count) %>%
  mutate(
    percent = round(n / sum(n) * 100, 1),
    total_vars = length(rq5y)
  ) %>%
  knitr::kable(
    col.names = c("# Missing Variables Per Pps", "N pps", "% pps", "Total Variables"),
    caption = "How many missing cells does each participant have on the key outcome variables? "
  )
How many missing cells does each participant have on the key outcome variables?
# Missing Variables Per Pps N pps % pps Total Variables
0 467 1.8 18
1 1032 4.0 18
2 1096 4.2 18
3 938 3.6 18
4 768 2.9 18
5 797 3.1 18
6 909 3.5 18
7 886 3.4 18
8 833 3.2 18
9 910 3.5 18
10 921 3.5 18
11 914 3.5 18
12 889 3.4 18
13 1030 4.0 18
14 1175 4.5 18
15 1254 4.8 18
16 1067 4.1 18
17 1311 5.0 18
18 8843 34.0 18

Missing data flux

Code
df %>%
  select(any_of(c(rq1x,rq5z))) %>%
  mice::fluxplot()

Plot illustrating showing how imputation works

Code
set.seed(10)
df_rq5_imputed %>%
  filter(randomtwinid2 %in% sample(.$randomtwinid2,30)) %>%
  mutate(id = factor(randomtwinid2, labels = paste0("pps",1:length(unique(.$randomtwinid2))))) %>%
  mutate(value = lcg1) %>%
  ggplot(aes(x = value)) + 
  geom_histogram(bins = 10) + 
  facet_wrap(~id, ncol = 5, scales = "fixed") + 
  labs(
    y = "General Cognitive Ability scores at age 12 (standardised; lcg1)",
    # title = NULL,
    x = NULL
  ) + 
  theme_bw()

Code
save_plot("5_3_descriptive_imputation_distribution.pdf", width = 9, height = 6)

df$lcg1 %>% sd(na.rm  =T)
[1] 1

Changes in means and variances

GT table

Code
library(gt)

bootstrap_summary_df %>%
  filter(parameter %in% c("md", "smd", "var")) %>%
  ungroup() %>% 
  mutate(outcome = rq5y_labels_short[match(.$outcome, rq5y)]) %>%
  # group_by(parameter) %>%
  # mutate(pval_adj = stats::p.adjust(pval, method = "holm")) %>%
  select(-pd,-pval, -n) %>%
  select(-starts_with(".")) %>%
  pivot_wider(
    values_from = c("y","ymin","ymax","pval_adj"),
    names_from = c("parameter")
  ) %>%
  # Arrange columns in order: md, smd, var
  select(outcome, 
         y_md, pval_adj_md, ymin_md, ymax_md,
         y_smd, pval_adj_smd, ymin_smd, ymax_smd,
         y_var, pval_adj_var, ymin_var, ymax_var) %>%
  gt() %>%
  fmt(
    columns = !contains("parameter") & !contains("outcome"),
    fns = function(x) {gbtoolbox::apa_num(x, n_decimal_places = 3)}
  ) %>%
  fmt(
    columns = "pval_adj_var",
    fns = function(x) {gbtoolbox::apa_num(x, n_decimal_places = 3)}
  ) %>%
  fmt_percent(
    columns = c("y_var", "ymin_var", "ymax_var"),
    decimals = 2,
    drop_trailing_zeros = FALSE,
    drop_trailing_dec_mark = FALSE
  ) %>%  # Format numeric columns
  # fmt_number(columns = starts_with("y_"), decimals = 3) %>%
  # fmt_number(columns = starts_with("ymin_"), decimals = 3) %>%
  # fmt_number(columns = starts_with("ymax_"), decimals = 3) %>%
  # fmt_scientific(columns = starts_with("pval_adj_"), decimals = 3)
  # Color code based on significance - separate rules for each statistic
  tab_style(
    style = list(cell_fill(color = "#ffcccc")),
    locations = cells_body(
      columns = c(y_md, pval_adj_md, ymin_md, ymax_md),
      rows = pval_adj_md < 0.05 & y_md < 0
    )
  ) %>%
  tab_style(
    style = list(cell_fill(color = "#ccffcc")),
    locations = cells_body(
      columns = c(y_md, pval_adj_md, ymin_md, ymax_md),
      rows = pval_adj_md < 0.05 & y_md > 0
    )
  ) %>%
  tab_style(
    style = list(cell_fill(color = "#ffcccc")),
    locations = cells_body(
      columns = c(y_smd, pval_adj_smd, ymin_smd, ymax_smd),
      rows = pval_adj_smd < 0.05 & y_smd < 0
    )
  ) %>%
  tab_style(
    style = list(cell_fill(color = "#ccffcc")),
    locations = cells_body(
      columns = c(y_smd, pval_adj_smd, ymin_smd, ymax_smd),
      rows = pval_adj_smd < 0.05 & y_smd > 0
    )
  ) %>%
  tab_style(
    style = list(cell_fill(color = "#ffcccc")),
    locations = cells_body(
      columns = c(y_var, pval_adj_var, ymin_var, ymax_var),
      rows = pval_adj_var < 0.05 & y_var < 0
    )
  ) %>%
  tab_style(
    style = list(cell_fill(color = "#ccffcc")),
    locations = cells_body(
      columns = c(y_var, pval_adj_var, ymin_var, ymax_var),
      rows = pval_adj_var < 0.05 & y_var > 0
    )
  ) %>%
  # Add column labels
  cols_label(
    outcome = "Variable",
    y_md = "Est", pval_adj_md = "p", ymin_md = "LB", ymax_md = "UB",
    y_smd = "Est", pval_adj_smd = "p", ymin_smd = "LB", ymax_smd = "UB", 
    y_var = "Est", pval_adj_var = "p", ymin_var = "LB", ymax_var = "UB"
  ) %>%
  # Add overarching CI headers
  tab_spanner(label = "95% CI", columns = c(ymin_md, ymax_md), id = "ci_md") %>%
  tab_spanner(label = "95% CI", columns = c(ymin_smd, ymax_smd), id = "ci_smd") %>%
  tab_spanner(label = "95% CI", columns = c(ymin_var, ymax_var), id = "ci_var") %>%
  # Add spanning headers for each statistic with formulas - simple format
  tab_spanner(label = md("Mean Difference<br>X̄<sub>imputed</sub> - X̄<sub>unimputed</sub>"), columns = c(y_md, pval_adj_md, ymin_md, ymax_md), id = "md_main") %>%
  tab_spanner(label = md("Standardized Mean Difference<br>(X̄<sub>imputed</sub> - X̄<sub>unimputed</sub>) /<br>σ<sub>unimputed</sub>"), columns = c(y_smd, pval_adj_smd, ymin_smd, ymax_smd), id = "smd_main") %>%
  tab_spanner(label = md("Variance % Change<br>(σ²<sub>imputed</sub> - σ²<sub>unimputed</sub>) /<br>σ²<sub>unimputed</sub> × 100"), columns = c(y_var, pval_adj_var, ymin_var, ymax_var), id = "var_main") %>%
  # Add footnote for p-values
  tab_footnote(
    footnote = "P values are Bonferroni-Holm adjusted within each statistic type (Mean Difference, SMD, Variance Difference, etc.",
    locations = cells_column_labels(columns = contains("pval_adj")),
    placement = "right"
  ) %>%
  # Style table - uniform formatting for spanners
  tab_style(
    style = cell_text(size = px(10), v_align = "middle"),
    locations = cells_column_spanners(spanners = c("md_main", "smd_main", "var_main"))
  ) %>%
  tab_options(
    column_labels.padding = px(0),
    table.font.size = px(9)
  ) %>%
  # Standardize column widths
  cols_width(
    outcome ~ px(80),
    c(y_md, y_smd, y_var) ~ px( 45),
    c(pval_adj_md, pval_adj_smd, pval_adj_var) ~ px(38),
    c(ymin_md, ymax_md, ymin_smd, ymax_smd, ymin_var, ymax_var) ~ px(45)
  )
Mean Difference
imputed - X̄unimputed
Standardized Mean Difference
(X̄imputed - X̄unimputed) /
σunimputed
Variance % Change
(σ²imputed - σ²unimputed) /
σ²unimputed × 100
Variable Est p1
95% CI
Est p1
95% CI
Est p1
95% CI
LB UB LB UB LB UB
Y12: Cognitive ability -.032 .004 -.052 -.012 -.032 .004 -.053 -.012 4.96% .002 2.12% 8.03%
Y14: Cognitive ability -.119 .000 -.151 -.086 -.120 .000 -.153 -.087 15.76% .000 10.63% 21.14%
Y16: Cognitive ability -.105 .000 -.167 -.045 -.105 .000 -.168 -.046 16.65% .000 10.60% 23.18%
Y21: G-game total score -1.290 .000 -1.572 -1.006 -.193 .000 -.237 -.150 21.70% .000 15.66% 28.02%
Y14: KS3 academic achievement -.047 .001 -.072 -.024 -.061 .001 -.093 -.030 −8.30% .005 −13.40% −2.88%
Y16: GCSE core subjects grade -.094 .000 -.111 -.078 -.078 .000 -.092 -.064 6.82% .000 4.48% 9.45%
Y21: Highest qualification -.270 .000 -.316 -.224 -.140 .000 -.166 -.115 25.19% .000 19.98% 30.67%
Y26: Highest qualification -.254 .000 -.305 -.205 -.130 .000 -.157 -.104 25.68% .000 20.07% 31.71%
Y12: Depression (MFQ) .144 .000 .092 .200 .043 .000 .027 .061 15.58% .000 9.80% 22.28%
Y16: Depression (MFQ) .403 .000 .304 .505 .091 .000 .068 .114 23.75% .000 16.74% 31.27%
Y21: Depression (MFQ) .358 .000 .268 .453 .088 .000 .065 .111 11.48% .000 7.85% 15.43%
Y26: Depression (MFQ) .375 .000 .226 .523 .061 .000 .036 .085 8.02% .000 4.09% 12.24%
Y21: Anxiety (GAD-D) .561 .000 .378 .749 .077 .000 .051 .103 15.84% .000 10.11% 22.15%
Y26: Anxiety (GAD-D) .339 .000 .159 .529 .048 .000 .022 .075 10.29% .000 5.15% 15.96%
Y12: Externalising .059 .005 .017 .102 .017 .005 .005 .030 4.32% .000 2.20% 6.63%
Y16: Externalising .308 .000 .246 .369 .095 .000 .076 .115 14.75% .000 11.01% 18.85%
Y21: Externalising .416 .000 .345 .486 .143 .000 .118 .168 19.88% .000 15.19% 25.15%
Y26: Externalising .400 .000 .328 .475 .148 .000 .120 .176 18.06% .000 12.92% 23.66%
1 P values are Bonferroni-Holm adjusted within each statistic type (Mean Difference, SMD, Variance Difference, etc.
Code
  # tab_style(
  #   style = cell_borders(sides = "right", color = "gray", weight = px(1)),
  #   locations = cells_body(columns = c(ymax_md, ymax_smd))
  # )

Changes in correlations

Code
test_correlation_matrix = matrix(
  nrow = length(rq5y),
  ncol = length(rq5y)
)

for(i in seq_along(rq5y)){
  for(j in seq_along(rq5y)){
    test_correlation_matrix[i,j] = paste(rq5y[i], rq5y[j], sep = "-")
  }
}

vars = test_correlation_matrix[lower.tri(test_correlation_matrix, diag = FALSE)]

  x_var = str_extract(vars, "^[^-]+")
  y_var = str_extract(vars, "[^-]+$")
  missingcode = paste0("missing",1:length(vars))
  
  
cor_df = do.call(rbind, lapply(seq_along(boot_compare_results), function(i) {
  result_df = t(as.data.frame(boot_compare_results[[i]]$cor_resid))
  result_df = data.frame(.imp = i, .boot = 1:nrow(result_df), result_df, row.names = NULL)
  return(result_df)
}))

bootstrap_summary_cor = apply(select(cor_df,-c(.imp,.boot)),2, function(xx) .mean_qi_pd(xx)) %>%
  bind_rows() %>%
  ungroup() %>%
  mutate(
    vars  = vars,
    x_var = x_var,
    y_var = y_var,
    pval_adj = stats::p.adjust(pval, method = "holm")
  )

if (length(vars) != nrow(bootstrap_summary_cor)) stop("error")

bootstrap_summary_cor %>% arrange(desc(abs(y)))

Plot of changes in correlations

Code
bootstrap_summary_cor %>%
  # filter(x_var!=y_var) %>%             # These correlations should probs be removed from bootstrap code! 
  plot_lower_triangular_matrix2(
    variables = rq5y,
    labels    = rq5y_labels_short,
    method    = "none"
  ) +
  labs(
    title = "Correlation Change",
    subtitle = expression(r[imputed] - r[original]),
    caption = NULL,
    tag = "B2"
  ) +
  theme(
      plot.title = element_text(hjust = 0.5, size = 16),
      plot.subtitle = element_text(hjust = 0.5, size = 13.5, margin = margin(b = 0)),
      plot.tag = element_text(hjust = 0, vjust = 0, size = 30, face = "bold"),
      plot.tag.position = "topleft",
      panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
      plot.background = element_blank()
  ) +
  scale_fill_gradient2(
      low = "#0571b0",
      mid = "white",
      high = "#ca0020",
      midpoint = 0,
      limits = c(-.185, .185),  # Set min and max here
      na.value = "white"
  ) 
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.

Code
save_plot("5_correlation_residuals", width = 8, height = 8, trim = 2)

📊 View Plot

GT Results table

Code
cor_table = bootstrap_summary_cor %>%
  mutate(
    x_var_label = rq5y_labels_short[match(x_var, rq5y)],
    y_var_label = rq5y_labels_short[match(y_var, rq5y)],
    pair = paste(x_var_label, "×", y_var_label)
  ) %>%
  select(pair, y, ymin, ymax, pval, pval_adj) %>%
  gt() %>%
  tab_options(
    table.width = px(800)
  ) %>%

  # Rename columns
  cols_label(
    pair = "Variable Pair",
    y = "Est",
    ymin = "LB",
    ymax = "UB",
    pval = md("p<sub>unadj</sub>"),
    pval_adj = md("p<sub>adj</sub>")
  ) %>%

  # Create column spanner
  tab_spanner(
    label = "Correlation Difference",
    columns = c(y, ymin, ymax, pval, pval_adj)
  ) %>%

  # Format numbers
  fmt(
    columns = c(y, ymin, ymax),
    fns = function(x) {gbtoolbox::apa_num(x, n_decimal_places = 3)}
  ) %>%
  fmt(
    columns = c(pval, pval_adj),
    fns = function(x) {gbtoolbox::apa_num(x, n_decimal_places = 3)}
  ) %>%

  # Styling - uniform font size
  tab_style(
    style = cell_text(size = px(10)),
    locations = cells_column_spanners()
  ) %>%
  tab_style(
    style = cell_text(size = px(10)),
    locations = cells_body()
  ) %>%
  tab_style(
    style = cell_text(size = px(10)),
    locations = cells_column_labels()
  ) %>%
  tab_style(
    style = cell_text(size = px(10)),
    locations = cells_footnotes()
  ) %>%

  # Highlight significant results - positive effects (light green)
  tab_style(
    style = cell_fill(color = "#d5e8d4"),
    locations = cells_body(
      columns = c(y, ymin, ymax, pval, pval_adj),
      rows = pval_adj < 0.05 & y > 0
    )
  ) %>%

  # Highlight significant results - negative effects (light red)
  tab_style(
    style = cell_fill(color = "#f8cecc"),
    locations = cells_body(
      columns = c(y, ymin, ymax, pval, pval_adj),
      rows = pval_adj < 0.05 & y < 0
    )
  ) %>%

  # Add footnotes
  tab_footnote(
    footnote = md("<em>Note.</em> Est = Estimate, LB = Lower Bound 95% Confidence Interval, UB = Upper Bound 95% Confidence Interval. Significant (p<sub>Bonferroni-Holm</sub>) effects are highlighted in green (increases) or red (decreases)."),
    placement = "right"
  ) %>%
  tab_footnote(
    footnote = "P values are Bonferroni-Holm adjusted",
    locations = cells_column_labels(columns = pval_adj),
    placement = "right"
  ) %>%
  tab_footnote(
    footnote = md("Correlation difference = Cor<sub>imputed</sub> - Cor<sub>original</sub>"),
    locations = cells_column_spanners(spanners = "Correlation Difference"),
    placement = "right"
  ) %>%
  opt_footnote_marks(marks = c("*", "†", "‡"))

cor_table
Variable Pair
Correlation Difference*
Est LB UB punadj padj
Y14: Cognitive ability × Y12: Cognitive ability -.079 -.105 -.053 .000 .000
Y16: Cognitive ability × Y12: Cognitive ability -.124 -.158 -.091 .000 .000
Y21: G-game total score × Y12: Cognitive ability -.114 -.152 -.077 .000 .000
Y14: KS3 academic achievement × Y12: Cognitive ability -.057 -.089 -.024 .000 .000
Y16: GCSE core subjects grade × Y12: Cognitive ability -.010 -.028 .009 .300 1.000
Y21: Highest qualification × Y12: Cognitive ability -.019 -.048 .010 .205 1.000
Y26: Highest qualification × Y12: Cognitive ability -.029 -.060 .002 .066 1.000
Y12: Depression (MFQ) × Y12: Cognitive ability -.008 -.032 .015 .477 1.000
Y16: Depression (MFQ) × Y12: Cognitive ability -.018 -.045 .010 .201 1.000
Y21: Depression (MFQ) × Y12: Cognitive ability -.001 -.029 .028 .928 1.000
Y26: Depression (MFQ) × Y12: Cognitive ability -.004 -.036 .027 .790 1.000
Y21: Anxiety (GAD-D) × Y12: Cognitive ability -.010 -.040 .021 .517 1.000
Y26: Anxiety (GAD-D) × Y12: Cognitive ability -.008 -.039 .024 .620 1.000
Y12: Externalising × Y12: Cognitive ability .000 -.021 .022 .969 1.000
Y16: Externalising × Y12: Cognitive ability .007 -.017 .032 .555 1.000
Y21: Externalising × Y12: Cognitive ability -.003 -.033 .026 .831 1.000
Y26: Externalising × Y12: Cognitive ability -.010 -.043 .024 .547 1.000
Y16: Cognitive ability × Y14: Cognitive ability -.153 -.190 -.117 .000 .000
Y21: G-game total score × Y14: Cognitive ability -.150 -.191 -.107 .000 .000
Y14: KS3 academic achievement × Y14: Cognitive ability -.091 -.127 -.056 .000 .000
Y16: GCSE core subjects grade × Y14: Cognitive ability -.068 -.094 -.042 .000 .000
Y21: Highest qualification × Y14: Cognitive ability -.030 -.069 .009 .134 1.000
Y26: Highest qualification × Y14: Cognitive ability -.048 -.086 -.009 .012 1.000
Y12: Depression (MFQ) × Y14: Cognitive ability .002 -.032 .036 .905 1.000
Y16: Depression (MFQ) × Y14: Cognitive ability -.004 -.041 .032 .817 1.000
Y21: Depression (MFQ) × Y14: Cognitive ability .014 -.023 .051 .444 1.000
Y26: Depression (MFQ) × Y14: Cognitive ability -.009 -.051 .031 .660 1.000
Y21: Anxiety (GAD-D) × Y14: Cognitive ability .006 -.038 .047 .790 1.000
Y26: Anxiety (GAD-D) × Y14: Cognitive ability -.007 -.049 .033 .735 1.000
Y12: Externalising × Y14: Cognitive ability .011 -.021 .043 .519 1.000
Y16: Externalising × Y14: Cognitive ability .021 -.012 .053 .205 1.000
Y21: Externalising × Y14: Cognitive ability -.014 -.050 .022 .437 1.000
Y26: Externalising × Y14: Cognitive ability -.036 -.078 .005 .090 1.000
Y21: G-game total score × Y16: Cognitive ability -.184 -.225 -.143 .000 .000
Y14: KS3 academic achievement × Y16: Cognitive ability -.135 -.184 -.086 .000 .000
Y16: GCSE core subjects grade × Y16: Cognitive ability -.102 -.131 -.075 .000 .000
Y21: Highest qualification × Y16: Cognitive ability -.059 -.098 -.021 .005 .607
Y26: Highest qualification × Y16: Cognitive ability -.058 -.099 -.018 .004 .585
Y12: Depression (MFQ) × Y16: Cognitive ability .003 -.035 .041 .854 1.000
Y16: Depression (MFQ) × Y16: Cognitive ability -.002 -.037 .035 .932 1.000
Y21: Depression (MFQ) × Y16: Cognitive ability .020 -.022 .062 .345 1.000
Y26: Depression (MFQ) × Y16: Cognitive ability .016 -.028 .062 .485 1.000
Y21: Anxiety (GAD-D) × Y16: Cognitive ability .009 -.036 .055 .673 1.000
Y26: Anxiety (GAD-D) × Y16: Cognitive ability .016 -.032 .064 .516 1.000
Y12: Externalising × Y16: Cognitive ability .028 -.008 .065 .125 1.000
Y16: Externalising × Y16: Cognitive ability .018 -.020 .055 .362 1.000
Y21: Externalising × Y16: Cognitive ability .010 -.033 .050 .642 1.000
Y26: Externalising × Y16: Cognitive ability -.011 -.056 .034 .613 1.000
Y14: KS3 academic achievement × Y21: G-game total score -.126 -.172 -.078 .000 .000
Y16: GCSE core subjects grade × Y21: G-game total score -.116 -.146 -.086 .000 .000
Y21: Highest qualification × Y21: G-game total score -.061 -.102 -.021 .002 .241
Y26: Highest qualification × Y21: G-game total score -.067 -.109 -.026 .000 .055
Y12: Depression (MFQ) × Y21: G-game total score .005 -.036 .046 .818 1.000
Y16: Depression (MFQ) × Y21: G-game total score .022 -.020 .063 .285 1.000
Y21: Depression (MFQ) × Y21: G-game total score .020 -.019 .059 .309 1.000
Y26: Depression (MFQ) × Y21: G-game total score .011 -.030 .053 .603 1.000
Y21: Anxiety (GAD-D) × Y21: G-game total score .003 -.037 .045 .878 1.000
Y26: Anxiety (GAD-D) × Y21: G-game total score .012 -.032 .055 .586 1.000
Y12: Externalising × Y21: G-game total score .008 -.033 .050 .711 1.000
Y16: Externalising × Y21: G-game total score .020 -.023 .062 .337 1.000
Y21: Externalising × Y21: G-game total score .027 -.017 .068 .224 1.000
Y26: Externalising × Y21: G-game total score -.008 -.049 .035 .704 1.000
Y16: GCSE core subjects grade × Y14: KS3 academic achievement -.074 -.098 -.050 .000 .000
Y21: Highest qualification × Y14: KS3 academic achievement -.052 -.090 -.015 .005 .702
Y26: Highest qualification × Y14: KS3 academic achievement -.046 -.087 -.006 .026 1.000
Y12: Depression (MFQ) × Y14: KS3 academic achievement .017 -.018 .051 .337 1.000
Y16: Depression (MFQ) × Y14: KS3 academic achievement -.019 -.053 .015 .277 1.000
Y21: Depression (MFQ) × Y14: KS3 academic achievement -.024 -.060 .013 .193 1.000
Y26: Depression (MFQ) × Y14: KS3 academic achievement -.003 -.044 .039 .906 1.000
Y21: Anxiety (GAD-D) × Y14: KS3 academic achievement -.043 -.082 -.004 .031 1.000
Y26: Anxiety (GAD-D) × Y14: KS3 academic achievement -.026 -.070 .019 .256 1.000
Y12: Externalising × Y14: KS3 academic achievement .029 -.000 .059 .051 1.000
Y16: Externalising × Y14: KS3 academic achievement .026 -.005 .058 .103 1.000
Y21: Externalising × Y14: KS3 academic achievement -.022 -.059 .013 .226 1.000
Y26: Externalising × Y14: KS3 academic achievement -.015 -.057 .027 .479 1.000
Y21: Highest qualification × Y16: GCSE core subjects grade .010 -.009 .030 .315 1.000
Y26: Highest qualification × Y16: GCSE core subjects grade .004 -.017 .025 .692 1.000
Y12: Depression (MFQ) × Y16: GCSE core subjects grade -.028 -.049 -.007 .007 .877
Y16: Depression (MFQ) × Y16: GCSE core subjects grade -.036 -.060 -.013 .001 .162
Y21: Depression (MFQ) × Y16: GCSE core subjects grade -.030 -.055 -.006 .013 1.000
Y26: Depression (MFQ) × Y16: GCSE core subjects grade -.014 -.040 .012 .299 1.000
Y21: Anxiety (GAD-D) × Y16: GCSE core subjects grade -.031 -.058 -.004 .027 1.000
Y26: Anxiety (GAD-D) × Y16: GCSE core subjects grade -.013 -.041 .016 .385 1.000
Y12: Externalising × Y16: GCSE core subjects grade -.017 -.033 .001 .061 1.000
Y16: Externalising × Y16: GCSE core subjects grade -.018 -.037 .002 .074 1.000
Y21: Externalising × Y16: GCSE core subjects grade -.044 -.070 -.018 .001 .082
Y26: Externalising × Y16: GCSE core subjects grade -.029 -.057 -.000 .050 1.000
Y26: Highest qualification × Y21: Highest qualification .012 -.002 .025 .070 1.000
Y12: Depression (MFQ) × Y21: Highest qualification .004 -.024 .032 .783 1.000
Y16: Depression (MFQ) × Y21: Highest qualification -.020 -.051 .009 .190 1.000
Y21: Depression (MFQ) × Y21: Highest qualification -.017 -.043 .008 .200 1.000
Y26: Depression (MFQ) × Y21: Highest qualification -.009 -.041 .023 .559 1.000
Y21: Anxiety (GAD-D) × Y21: Highest qualification -.018 -.047 .011 .226 1.000
Y26: Anxiety (GAD-D) × Y21: Highest qualification -.011 -.043 .021 .486 1.000
Y12: Externalising × Y21: Highest qualification -.009 -.036 .016 .483 1.000
Y16: Externalising × Y21: Highest qualification -.031 -.059 -.003 .024 1.000
Y21: Externalising × Y21: Highest qualification -.029 -.055 -.003 .033 1.000
Y26: Externalising × Y21: Highest qualification -.023 -.055 .009 .154 1.000
Y12: Depression (MFQ) × Y26: Highest qualification -.006 -.036 .023 .679 1.000
Y16: Depression (MFQ) × Y26: Highest qualification -.019 -.050 .011 .228 1.000
Y21: Depression (MFQ) × Y26: Highest qualification -.010 -.040 .018 .495 1.000
Y26: Depression (MFQ) × Y26: Highest qualification -.003 -.032 .026 .859 1.000
Y21: Anxiety (GAD-D) × Y26: Highest qualification -.014 -.046 .018 .387 1.000
Y26: Anxiety (GAD-D) × Y26: Highest qualification -.003 -.031 .026 .828 1.000
Y12: Externalising × Y26: Highest qualification -.031 -.059 -.004 .026 1.000
Y16: Externalising × Y26: Highest qualification -.018 -.047 .012 .237 1.000
Y21: Externalising × Y26: Highest qualification -.032 -.063 -.001 .044 1.000
Y26: Externalising × Y26: Highest qualification -.014 -.044 .016 .362 1.000
Y16: Depression (MFQ) × Y12: Depression (MFQ) .038 .008 .068 .017 1.000
Y21: Depression (MFQ) × Y12: Depression (MFQ) -.014 -.039 .010 .264 1.000
Y26: Depression (MFQ) × Y12: Depression (MFQ) -.006 -.035 .022 .660 1.000
Y21: Anxiety (GAD-D) × Y12: Depression (MFQ) -.010 -.039 .019 .477 1.000
Y26: Anxiety (GAD-D) × Y12: Depression (MFQ) -.011 -.042 .020 .472 1.000
Y12: Externalising × Y12: Depression (MFQ) .027 .012 .042 .000 .028
Y16: Externalising × Y12: Depression (MFQ) .015 -.009 .039 .216 1.000
Y21: Externalising × Y12: Depression (MFQ) -.016 -.041 .010 .230 1.000
Y26: Externalising × Y12: Depression (MFQ) -.010 -.042 .019 .508 1.000
Y21: Depression (MFQ) × Y16: Depression (MFQ) .007 -.020 .034 .642 1.000
Y26: Depression (MFQ) × Y16: Depression (MFQ) .002 -.028 .032 .900 1.000
Y21: Anxiety (GAD-D) × Y16: Depression (MFQ) .004 -.028 .036 .789 1.000
Y26: Anxiety (GAD-D) × Y16: Depression (MFQ) .011 -.021 .045 .523 1.000
Y12: Externalising × Y16: Depression (MFQ) .034 .009 .058 .005 .655
Y16: Externalising × Y16: Depression (MFQ) .046 .025 .066 .000 .000
Y21: Externalising × Y16: Depression (MFQ) .009 -.017 .037 .515 1.000
Y26: Externalising × Y16: Depression (MFQ) .004 -.027 .036 .786 1.000
Y26: Depression (MFQ) × Y21: Depression (MFQ) -.016 -.040 .006 .157 1.000
Y21: Anxiety (GAD-D) × Y21: Depression (MFQ) .007 -.016 .029 .536 1.000
Y26: Anxiety (GAD-D) × Y21: Depression (MFQ) -.005 -.030 .020 .689 1.000
Y12: Externalising × Y21: Depression (MFQ) .012 -.013 .037 .339 1.000
Y16: Externalising × Y21: Depression (MFQ) .019 -.008 .046 .164 1.000
Y21: Externalising × Y21: Depression (MFQ) .003 -.016 .023 .746 1.000
Y26: Externalising × Y21: Depression (MFQ) -.015 -.043 .013 .286 1.000
Y21: Anxiety (GAD-D) × Y26: Depression (MFQ) -.004 -.030 .023 .751 1.000
Y26: Anxiety (GAD-D) × Y26: Depression (MFQ) -.011 -.025 .003 .131 1.000
Y12: Externalising × Y26: Depression (MFQ) .006 -.022 .033 .685 1.000
Y16: Externalising × Y26: Depression (MFQ) .011 -.018 .040 .437 1.000
Y21: Externalising × Y26: Depression (MFQ) -.014 -.041 .012 .294 1.000
Y26: Externalising × Y26: Depression (MFQ) -.020 -.041 .002 .073 1.000
Y26: Anxiety (GAD-D) × Y21: Anxiety (GAD-D) -.003 -.030 .024 .818 1.000
Y12: Externalising × Y21: Anxiety (GAD-D) .025 -.003 .052 .076 1.000
Y16: Externalising × Y21: Anxiety (GAD-D) .019 -.010 .049 .212 1.000
Y21: Externalising × Y21: Anxiety (GAD-D) .005 -.022 .034 .725 1.000
Y26: Externalising × Y21: Anxiety (GAD-D) -.009 -.038 .021 .562 1.000
Y12: Externalising × Y26: Anxiety (GAD-D) .007 -.022 .036 .655 1.000
Y16: Externalising × Y26: Anxiety (GAD-D) .009 -.022 .042 .571 1.000
Y21: Externalising × Y26: Anxiety (GAD-D) -.007 -.037 .023 .635 1.000
Y26: Externalising × Y26: Anxiety (GAD-D) -.019 -.041 .005 .117 1.000
Y16: Externalising × Y12: Externalising .005 -.014 .024 .605 1.000
Y21: Externalising × Y12: Externalising .003 -.021 .028 .793 1.000
Y26: Externalising × Y12: Externalising -.003 -.032 .025 .808 1.000
Y21: Externalising × Y16: Externalising .005 -.019 .028 .662 1.000
Y26: Externalising × Y16: Externalising .007 -.021 .038 .636 1.000
Y26: Externalising × Y21: Externalising -.011 -.039 .017 .425 1.000
Note. Est = Estimate, LB = Lower Bound 95% Confidence Interval, UB = Upper Bound 95% Confidence Interval. Significant (pBonferroni-Holm) effects are highlighted in green (increases) or red (decreases).
* Correlation difference = Corimputed - Cororiginal
P values are Bonferroni-Holm adjusted

plot of unimputed correlations

THIS PLOT NEEDS TO BE UPDATED AS WE DIDN”TINCLUDE ALL PARTICIPANTS IN df IN THE ANALYSES

Code
df %>%
  filter(!(randomfamid %in% exclude_fams_onesib)) %>%                           # Exclude fams with one sub in the study
  filter(!(randomfamid %in% rq5_exclude_fams)) %>%                              # Exclude fams with 0 data on rq5y variables (key outcomes)
  filter(!(randomfamid %in% rq5_exclude_fams_2)) %>%        
  select(all_of(rq5y)) %>%
  data.frame() %>%
  `colnames<-`(rq5y_labels_short) %>%
  gbtoolbox::plot_correlations(
    confidence_interval = FALSE,
    text_size_r = 2.5,
    abs_colour = FALSE
  ) +
    labs(
    title = "Unimputed Pairwise correlations",
    subtitle = "Below Diagonal: Correlations. Diagonal: Univariate Sample Sizes. Above Diagonal: Pairwise Sample Sizes",
    caption = NULL,
    tag = "B1"
  ) +
    theme(
      plot.title = element_text(hjust = 0.5, size = 16, face = "plain"),
      plot.subtitle = element_text(hjust = 0, size = 8, margin = margin(b = 0)),
      plot.caption = element_text(hjust = 1, size = 8),
      plot.tag = element_text(hjust = 0, vjust = 0, size = 30, face = "bold"),
      plot.tag.position = "topleft",
      plot.background = element_blank()
  ) +
  scale_fill_gradient2(
      low = "#0571b0",
      mid = "white",
      high = "#ca0020",
      midpoint = 0,
      limits = c(-.9, .9),  # Set min and max here
      na.value = "white"
  ) 
Warning in gbtoolbox::plot_correlations(., confidence_interval = FALSE, : This function is in development, and not yet ready for widespread use. 
  Proceed with caution
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.

Code
save_plot("5_correlations_unimputed", width = 8, height = 8, trim = TRUE)

📊 View Plot

Changes in ACE estimates

Count of significant ACE differences (unadjusted)

Code
bootstrap_summary_df_ace %>%
  filter(group == "Difference", par %in% c("a", "c", "e")) %>%
  summarise(
    total_tests = dplyr::n(),
    sig_unadj = sum(pval < 0.05),
    pct_sig = round(sig_unadj / total_tests * 100, 1)
  )

Largest change

Code
bootstrap_summary_df_ace %>%
  filter(group == "Difference", par %in% c("a", "c", "e")) %>%
  pull(y) %>%
  max(abs(.))
[1] 0.3832294

Table

Code
# bootstrap_summary_df_ace %>%
#   filter(group == "Difference") %>% 
#   # group_by(par) %>%
#   mutate(
#     pval_adj = stats::p.adjust(pval, method = "holm"),
#     name     = rq5y_labels_short[match(name, rq5y)]
#   ) %>%
#   ungroup() %>%
#   arrange(pval_adj) %>%
#   knitr::kable()


bootstrap_summary_df_ace %>%
  filter(par %in% c("a","c","e")) %>%
  group_by(group) %>%
  mutate(
    pval_adj = stats::p.adjust(pval, method = "holm"),
  ) %>%
  ungroup() %>%
  select(-starts_with("."), -n, -pd) %>%
  mutate(name_clean = rq5y_labels_short[match(name, rq5y)]) %>%
  pivot_wider(
    id_cols = c(par, name_clean, sex),
    names_from = group,
    values_from = c(y, ymin, ymax, pval, pval_adj),
    names_sep = "_"
  ) %>%
  select(-ymin_Original, -ymax_Original, -ymin_Imputed, -ymax_Imputed, -pval_Original, -pval_Imputed, -pval_adj_Original, -pval_adj_Imputed) %>%
  select(name_clean, par, y_Original, y_Imputed, y_Difference, ymin_Difference, ymax_Difference, pval_Difference, pval_adj_Difference, sex) %>%
  arrange(sex, name_clean, par, y_Imputed) %>%
  mutate(par = toupper(par)) %>%
  gt(groupname_col = "sex") %>%
  fmt_number(decimals = 3) %>%
  tab_row_group(
    label = "Male",
    rows = sex == "male"
  ) %>%
  tab_row_group(
    label = "Female",
    rows = sex == "female"
  ) %>%
  cols_hide(sex) %>%
  cols_label(
    name_clean = "Variable",
    par = "",
    y_Original = "Original",
    y_Imputed = "Imputed",
    y_Difference = "Diff",
    ymin_Difference = "Lower",
    ymax_Difference = "Upper",
    pval_Difference = md("p<sub>unadj</sub>"),
    pval_adj_Difference = md("p<sub>adj</sub>")
  ) %>%
  tab_spanner(
    label = "95% CI",
    columns = c(ymin_Difference, ymax_Difference)
  ) %>%
  tab_spanner(
    label = "Estimates",
    columns = c(y_Imputed, y_Original)
  ) %>%
  tab_style(
    style = cell_fill(color = "lightgreen"),
    locations = cells_body(
      columns = everything(),
      rows = pval_adj_Difference < 0.05
    )
  ) %>%
  tab_footnote(
    footnote = "P values are Bonferroni-Holm adjusted within each group (Original, Imputed, Difference)",
    locations = cells_column_labels(columns = pval_adj_Difference)
  )
Variable
Estimates
Diff
95% CI
punadj padj1
Imputed Original Lower Upper
Female
Y12: Cognitive ability A 0.471 0.441 0.030 −0.088 0.150 0.633 1.000
Y12: Cognitive ability C 0.214 0.259 −0.045 −0.157 0.059 0.407 1.000
Y12: Cognitive ability E 0.315 0.300 0.016 −0.015 0.049 0.339 1.000
Y12: Depression (MFQ) A 0.248 0.220 0.028 −0.114 0.180 0.713 1.000
Y12: Depression (MFQ) C 0.255 0.209 0.046 −0.094 0.182 0.491 1.000
Y12: Depression (MFQ) E 0.497 0.571 −0.074 −0.130 −0.023 0.004 0.453
Y12: Externalising A 0.509 0.491 0.018 −0.060 0.087 0.494 1.000
Y12: Externalising C 0.023 0.017 0.006 −0.053 0.074 0.912 1.000
Y12: Externalising E 0.468 0.492 −0.024 −0.057 0.012 0.175 1.000
Y14: Cognitive ability A 0.500 0.471 0.029 −0.184 0.237 0.797 1.000
Y14: Cognitive ability C 0.076 0.153 −0.077 −0.263 0.116 0.429 1.000
Y14: Cognitive ability E 0.424 0.376 0.048 −0.015 0.115 0.139 1.000
Y14: KS3 academic achievement A 0.577 0.442 0.135 0.005 0.264 0.043 1.000
Y14: KS3 academic achievement C 0.327 0.489 −0.162 −0.291 −0.037 0.012 1.000
Y14: KS3 academic achievement E 0.096 0.069 0.027 0.010 0.046 0.004 0.395
Y16: Cognitive ability A 0.221 0.298 −0.077 −0.339 0.191 0.564 1.000
Y16: Cognitive ability C 0.258 0.264 −0.006 −0.230 0.202 0.968 1.000
Y16: Cognitive ability E 0.521 0.438 0.084 −0.008 0.187 0.073 1.000
Y16: Depression (MFQ) A 0.242 0.201 0.041 −0.126 0.207 0.630 1.000
Y16: Depression (MFQ) C 0.252 0.206 0.046 −0.096 0.182 0.524 1.000
Y16: Depression (MFQ) E 0.506 0.593 −0.087 −0.149 −0.024 0.007 0.673
Y16: Externalising A 0.520 0.482 0.038 −0.007 0.081 0.094 1.000
Y16: Externalising C 0.000 0.000 0.000 0.000 0.000 1.000 1.000
Y16: Externalising E 0.479 0.518 −0.038 −0.081 0.007 0.093 1.000
Y16: GCSE core subjects grade A 0.519 0.527 −0.008 −0.061 0.050 0.754 1.000
Y16: GCSE core subjects grade C 0.374 0.364 0.010 −0.047 0.062 0.694 1.000
Y16: GCSE core subjects grade E 0.107 0.109 −0.002 −0.010 0.008 0.603 1.000
Y21: Anxiety (GAD-D) A 0.392 0.375 0.017 −0.070 0.095 0.627 1.000
Y21: Anxiety (GAD-D) C 0.006 0.004 0.002 −0.041 0.062 1.000 1.000
Y21: Anxiety (GAD-D) E 0.602 0.621 −0.019 −0.082 0.048 0.573 1.000
Y21: Depression (MFQ) A 0.198 0.148 0.050 −0.109 0.216 0.561 1.000
Y21: Depression (MFQ) C 0.165 0.182 −0.017 −0.159 0.118 0.824 1.000
Y21: Depression (MFQ) E 0.636 0.669 −0.033 −0.086 0.020 0.222 1.000
Y21: Externalising A 0.334 0.275 0.059 −0.100 0.218 0.419 1.000
Y21: Externalising C 0.065 0.084 −0.018 −0.149 0.109 0.782 1.000
Y21: Externalising E 0.601 0.641 −0.040 −0.097 0.019 0.188 1.000
Y21: G-game total score A 0.557 0.482 0.075 −0.156 0.287 0.510 1.000
Y21: G-game total score C 0.101 0.255 −0.154 −0.349 0.057 0.146 1.000
Y21: G-game total score E 0.342 0.263 0.079 0.020 0.146 0.007 0.687
Y21: Highest qualification A 0.352 0.344 0.009 −0.138 0.155 0.912 1.000
Y21: Highest qualification C 0.314 0.310 0.004 −0.125 0.134 0.938 1.000
Y21: Highest qualification E 0.333 0.347 −0.013 −0.058 0.032 0.559 1.000
Y26: Anxiety (GAD-D) A 0.372 0.393 −0.021 −0.191 0.116 0.852 1.000
Y26: Anxiety (GAD-D) C 0.051 0.020 0.030 −0.078 0.171 0.664 1.000
Y26: Anxiety (GAD-D) E 0.577 0.587 −0.010 −0.076 0.059 0.781 1.000
Y26: Depression (MFQ) A 0.335 0.391 −0.056 −0.230 0.105 0.510 1.000
Y26: Depression (MFQ) C 0.096 0.047 0.050 −0.084 0.194 0.484 1.000
Y26: Depression (MFQ) E 0.569 0.563 0.006 −0.054 0.068 0.857 1.000
Y26: Externalising A 0.405 0.402 0.003 −0.107 0.093 0.871 1.000
Y26: Externalising C 0.014 0.008 0.007 −0.061 0.097 0.990 1.000
Y26: Externalising E 0.581 0.591 −0.010 −0.069 0.050 0.740 1.000
Y26: Highest qualification A 0.294 0.267 0.027 −0.139 0.199 0.770 1.000
Y26: Highest qualification C 0.362 0.363 −0.002 −0.157 0.146 0.996 1.000
Y26: Highest qualification E 0.344 0.369 −0.025 −0.077 0.027 0.338 1.000
Male
Y12: Cognitive ability A 0.483 0.401 0.082 −0.076 0.252 0.315 1.000
Y12: Cognitive ability C 0.227 0.347 −0.120 −0.278 0.020 0.097 1.000
Y12: Cognitive ability E 0.290 0.252 0.039 0.001 0.079 0.044 1.000
Y12: Depression (MFQ) A 0.393 0.383 0.009 −0.085 0.089 0.657 1.000
Y12: Depression (MFQ) C 0.030 0.022 0.008 −0.060 0.093 0.944 1.000
Y12: Depression (MFQ) E 0.578 0.595 −0.017 −0.055 0.020 0.356 1.000
Y12: Externalising A 0.539 0.534 0.006 −0.053 0.079 0.889 1.000
Y12: Externalising C 0.021 0.027 −0.006 −0.070 0.044 0.918 1.000
Y12: Externalising E 0.440 0.439 0.000 −0.023 0.026 0.986 1.000
Y14: Cognitive ability A 0.332 0.433 −0.101 −0.396 0.172 0.500 1.000
Y14: Cognitive ability C 0.106 0.161 −0.056 −0.280 0.181 0.639 1.000
Y14: Cognitive ability E 0.562 0.406 0.157 0.059 0.265 0.001 0.085
Y14: KS3 academic achievement A 0.450 0.452 −0.002 −0.155 0.153 0.966 1.000
Y14: KS3 academic achievement C 0.376 0.440 −0.064 −0.205 0.069 0.354 1.000
Y14: KS3 academic achievement E 0.174 0.108 0.066 0.029 0.114 0.000 0.043
Y16: Cognitive ability A 0.461 0.570 −0.109 −0.360 0.165 0.384 1.000
Y16: Cognitive ability C 0.032 0.080 −0.048 −0.274 0.157 0.682 1.000
Y16: Cognitive ability E 0.507 0.350 0.157 0.052 0.275 0.004 0.378
Y16: Depression (MFQ) A 0.374 0.308 0.066 −0.078 0.247 0.326 1.000
Y16: Depression (MFQ) C 0.016 0.031 −0.015 −0.153 0.090 0.860 1.000
Y16: Depression (MFQ) E 0.610 0.661 −0.051 −0.147 0.045 0.284 1.000
Y16: Externalising A 0.414 0.424 −0.010 −0.127 0.066 0.915 1.000
Y16: Externalising C 0.013 0.002 0.011 0.000 0.099 0.878 1.000
Y16: Externalising E 0.573 0.574 0.000 −0.063 0.067 0.972 1.000
Y16: GCSE core subjects grade A 0.649 0.633 0.015 −0.062 0.101 0.734 1.000
Y16: GCSE core subjects grade C 0.216 0.235 −0.019 −0.102 0.056 0.663 1.000
Y16: GCSE core subjects grade E 0.136 0.132 0.003 −0.008 0.017 0.607 1.000
Y21: Anxiety (GAD-D) A 0.170 0.235 −0.065 −0.258 0.149 0.443 1.000
Y21: Anxiety (GAD-D) C 0.025 0.024 0.002 −0.156 0.147 1.000 1.000
Y21: Anxiety (GAD-D) E 0.805 0.742 0.063 −0.063 0.187 0.309 1.000
Y21: Depression (MFQ) A 0.185 0.213 −0.028 −0.264 0.204 0.786 1.000
Y21: Depression (MFQ) C 0.064 0.065 −0.001 −0.184 0.174 1.000 1.000
Y21: Depression (MFQ) E 0.751 0.722 0.030 −0.079 0.167 0.646 1.000
Y21: Externalising A 0.233 0.306 −0.073 −0.290 0.114 0.406 1.000
Y21: Externalising C 0.049 0.022 0.027 −0.123 0.194 0.761 1.000
Y21: Externalising E 0.718 0.671 0.046 −0.056 0.153 0.393 1.000
Y21: G-game total score A 0.424 0.310 0.114 −0.259 0.433 0.497 1.000
Y21: G-game total score C 0.021 0.404 −0.383 −0.646 −0.064 0.016 1.000
Y21: G-game total score E 0.555 0.286 0.269 0.129 0.432 0.000 0.000
Y21: Highest qualification A 0.547 0.564 −0.017 −0.244 0.226 0.851 1.000
Y21: Highest qualification C 0.090 0.126 −0.036 −0.245 0.155 0.759 1.000
Y21: Highest qualification E 0.363 0.310 0.053 −0.024 0.133 0.181 1.000
Y26: Anxiety (GAD-D) A 0.236 0.292 −0.056 −0.281 0.200 0.554 1.000
Y26: Anxiety (GAD-D) C 0.021 0.027 −0.006 −0.198 0.141 1.000 1.000
Y26: Anxiety (GAD-D) E 0.742 0.680 0.062 −0.093 0.224 0.441 1.000
Y26: Depression (MFQ) A 0.200 0.260 −0.060 −0.294 0.214 0.556 1.000
Y26: Depression (MFQ) C 0.033 0.067 −0.034 −0.256 0.135 0.861 1.000
Y26: Depression (MFQ) E 0.768 0.673 0.094 −0.033 0.227 0.146 1.000
Y26: Externalising A 0.185 0.223 −0.038 −0.335 0.267 0.781 1.000
Y26: Externalising C 0.074 0.110 −0.036 −0.279 0.191 0.816 1.000
Y26: Externalising E 0.741 0.667 0.073 −0.063 0.213 0.294 1.000
Y26: Highest qualification A 0.588 0.440 0.148 −0.142 0.463 0.363 1.000
Y26: Highest qualification C 0.052 0.231 −0.179 −0.461 0.072 0.195 1.000
Y26: Highest qualification E 0.361 0.329 0.032 −0.060 0.124 0.495 1.000
1 P values are Bonferroni-Holm adjusted within each group (Original, Imputed, Difference)

Plot

Code
bootstrap_summary_df_ace %>%
  filter(par %in% c("a","c","e")) %>%
  filter(group != "Difference") %>%
  mutate(
    name = factor(rq5y_labels_short[match(name, rq5y)], levels = rq5y_labels_short),
    group = ifelse(group == "Imputed", "I", "O"),
    group = factor(group, levels = c("I", "O")),
    sex = str_to_title(sex),
    par = toupper(par)
  ) %>%
  ggplot(aes(x = group, y = y, fill = par)) +
  geom_col(position = "stack", alpha = 1) +
  facet_grid(sex ~ name, switch = "x") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 0, hjust = .5, size = 10),
    strip.text = element_text(size = 10),
    strip.text.x = element_text(angle = 90, hjust = 1, vjust = .5, size = 10),
    strip.text.y = element_text(angle = 0, size = 11),
    strip.placement = "outside",
    panel.grid = element_blank(),
    plot.tag = element_text(hjust = 0, vjust = 0, size = 24, face = "bold"),
    plot.tag.position = "topleft"
  ) +
  labs(
    title = "Imputed (I) or Original (Non-Imputed; O) ACE Estimates",
    y     = "Proportion",
    x     = NULL,
    fill  = "Component",
    tag   = "B"
  ) +
  scale_fill_manual(values = c("A" = "#d73027", "C" = "#fee08b", "E" = "#4575b4")) +
  coord_cartesian(ylim = c(0, 1))

Code
save_plot("5_ace_comparison", width = 8, height = 5, trim = TRUE)

Supplementary Tables & Plots

Comparison of sample size pre- and post-imputation

Code
p1 = df %>%
  select(all_of(rq5y)) %>%
  `colnames<-`(c(rq5y_labels_short)) %>%
  # select(rq5y) %>%
  # `colnames<-`(rq5y_labels_short) %>%
  as.data.frame() %>% # Note that this is needed for function to work - could improve? 
  gbtoolbox::plot_correlations(
    confidence_interval =  FALSE,
    sample_size = TRUE,
    text_size_r = 1.5,
    text_size_axis = 8.5
  ) + 
  labs(title = "Original (non-imputed data)")
Warning in gbtoolbox::plot_correlations(., confidence_interval = FALSE, : This function is in development, and not yet ready for widespread use. 
  Proceed with caution
Code
p2 = df_rq5_imputed %>%
  filter(.imp == 1) %>%
  select(all_of(rq5y)) %>%
  `colnames<-`(c(rq5y_labels_short)) %>%
  # select(rq5y) %>%
  # `colnames<-`(rq5y_labels_short) %>%
  as.data.frame() %>% # Note that this is needed for function to work - could improve? 
  gbtoolbox::plot_correlations(
    confidence_interval =  FALSE,
    sample_size = TRUE,
    text_size_r = 1.5,
    text_size_axis = 8.5
  ) +
  labs(title = "Imputed data")
Warning in gbtoolbox::plot_correlations(., confidence_interval = FALSE, : This function is in development, and not yet ready for widespread use. 
  Proceed with caution
Code
p1 + p2

Code
save_plot("5_3_correlation_comparison", width = 5.3*2, height = 5.3)



df_rq5_imputed %>%
  filter(.imp == 1) %>%
  sapply(., function(x) length(which(!is.na(x))))
             .imp            sexzyg       randomfamid           x3zygos 
            12976             12976             12976             12976 
           random              lcg1              ncg1              pcg1 
            12976             12976             12976             12976 
            ucgt1        npks3tall1 pcexgcsecoregrdm1        u1chqualp1 
            12976             12976             12976             12976 
        zmhhqual1           lcmfqt1         pcbhmfqt1          u1cmfqt1 
            12976             12976             12976             12976 
         zmhmfqt1         u2cganxt1         zmhganxt1          lsdqext1 
            12976             12976             12976             12976 
         psdqext1          usdqext1          zsdqext1     randomtwinid2 
            12976             12976             12976             12976 
Code
# df_rq5_imputed$ncg1

Check for convergence of algorithm

Code
imputed_mice %>% length()
[1] 1000
Code
plot(imputed_mice[[4]], layout = c(4,6))

Code
# plot(imputed_mice[[3]], y = rq5y, layout = c(4,6))

rq5y_labels_short[!(rq5y %in% colnames(imputed_mice[[1]]$data))]
[1] "Y12: Externalising" "Y16: Externalising" "Y21: Externalising"
[4] "Y26: Externalising"

Table of all variables for imputation

Code
# Function to map variable prefix to study wave
get_study_wave = function(var_name) {
  prefix = substr(var_name, 1, 1)
  wave_map = c(
    "a" = "1st Contact",
    "b" = "2 Year", 
    "c" = "3 Year",
    "d" = "4 Year",
    "e" = "In Home",
    "g" = "7 Year",
    "h" = "8 Year", 
    "i" = "9 Year",
    "j" = "10 Year",
    "l" = "12 Year",
    "n" = "14 Year",
    "p" = "16 Year",
    "r" = "18 Year",
    "u" = "21 Year",
    "z" = "26 Year"
  )
  return(wave_map[prefix])
}

impute_vars = readRDS(file.path("results","5_1_imputation_variables.Rds"))
impute_vars_labels = var_to_label(impute_vars) %>% sapply(., function(x) ifelse(is.null(x[1]), "", x[1]))

# Create formatted table for imputation variables
v_impute = data.frame(
  Description = impute_vars_labels,
  `Teds Code` = ifelse(impute_vars %in% original_colnames, impute_vars, paste0(impute_vars,"*")),
  `Range or Level` = sapply(impute_vars, function(var) {
    if (var %in% colnames(df)) {
      if (class(df[[var]]) == "numeric") {
        paste0(round(min(df[[var]], na.rm = TRUE), 2), " — ", round(max(df[[var]], na.rm = TRUE), 2))
      } else if (is.factor(df[[var]])) {
        factor_levels = levels(df[[var]])
        paste(c(paste0(factor_levels[1],"*"), factor_levels[-1]), collapse = ", ")
      } else {
        paste(unique(df[[var]]), collapse = ", ")
      }
    } else {
      "Variable not found"
    }
  }),
  N = sapply(impute_vars, function(var) {
    if (var %in% colnames(df)) {
      sum(!is.na(df[[var]]))
    } else {
      0
    }
  }),
  `Study.Wave` = sapply(impute_vars, get_study_wave)
)

# Clean up descriptions
# v_impute$Description = str_remove(v_impute$Description, "\\(1st.*")
# v_impute$Description = str_remove(v_impute$Description, "\\(2.*")

# Manual edit for specific variables
v_impute$`Study.Wave`[v_impute$`Teds.Code` == "cens01pop98density"] = "1st Contact"
v_impute$`Study.Wave`[v_impute$`Teds.Code` == "pollution1998pca"] = "1st Contact"

# Add row group information
# v_impute_indexed = cbind(row_group = "Imputation Variables", row_id = 1:nrow(v_impute), v_impute)

gt(v_impute) %>%
  # tab_row_group(
  #   label = "Imputation Variables",
  #   rows = row_group == "Imputation Variables"
  # ) %>%
  # cols_hide(c(row_group, row_id)) %>%
  cols_label_with(fn = ~ gsub("\\.", " ", .x)) %>%
  tab_style(
    style = cell_text(size = px(12)),
    locations = cells_body()
  ) %>%
  tab_style(
    style = cell_text(size = px(12)),
    locations = cells_column_labels()
  ) %>%
  tab_style(
    style = cell_text(size = px(12)),
    locations = cells_row_groups()
  ) %>%
  cols_hide(N) %>%
  tab_source_note(
    source_note = "Note: Externalising SDQ scores are created post-imputation by adding conduct and hyperactivity problem scales. Range or Level shows min—max values for numeric variables or factor levels for categorical variables (reference level marked with *). Variable codes with an asterisk (*) have been derived or modified from the original dataset."
  ) %>%
  tab_options(
    table.width = "70%"
  ) %>%
  cols_width(
    Description ~ px(300),
    everything() ~ px(120)
  )
Description Teds Code Range or Level Study Wave
School cohort, see value labels cohort Cohort 1: twins born Jan-94 to Aug-94*, Cohort 2: twins born Sep-94 to Aug-95, Cohort 3: twins born Sep-95 to Aug-96, Cohort 4: twins born Sep-96 to Dec-96 3 Year
Age in years of natural mother at time of birth of twins amumagetw 14 — 45 1st Contact
Single Parent asingle* cohabiting biological mother and father / cohabiting biological parent with other*, single parent 1st Contact
Mother SOC employment level (1st Contact), 1-9 amosoc2* caring for children at home*, 1, 2, 3, 4, 5, 6, 7, 8, 9, no job 1st Contact
Maternal Education (formatted as numeric) amohqualn* 1 — 8 1st Contact
Ethnic origin of twins, simplified coding (1st Contact), 1=white 0=other aethnic 0 — 1 1st Contact
Main language spoken at home (1st Contact), see value labels alang other*, English, English + other 1st Contact
Number of older siblings (formatted as numeric variable) anoldsibn* 0 — 5 1st Contact
Number of younger siblings (formatted as numeric variable) anyngsibn* 0 — 2 1st Contact
Member of a Twins Club (1st Contact), 1Y 0N atwclub 0*, 1 1st Contact
Census data 2001 (code KS001) linked to 1998 postcode: population density, N per hectare cens01pop98density 0 — 310 1st Contact
Smoked cigarettes while pregnant (1st Contact), 1Y 0N asmoke 0*, 1 1st Contact
Non-verbal cognitive ability composite (4 Year), standardised dscnv1 -4.6 — 3.08 4 Year
Non-verbal cognitive ability composite (4 Year), standardised dscnv2 -4.6 — 3.08 4 Year
Verbal cognitive ability composite (4 Year), standardised dscv1 -3.13 — 1.49 4 Year
Verbal cognitive ability composite (4 Year), standardised dscv2 -3.13 — 1.49 4 Year
SDQ Conduct scale (4 Year), 0-10 dsdqcont1 0 — 10 4 Year
SDQ Conduct scale (4 Year), 0-10 dsdqcont2 0 — 10 4 Year
SDQ Hyperactivity scale (4 Year), 0-10 dsdqhypt1 0 — 10 4 Year
SDQ Hyperactivity scale (4 Year), 0-10 dsdqhypt2 0 — 10 4 Year
ARBQ total scale (4 Year), 0-36 danxt1 0 — 33 4 Year
ARBQ total scale (4 Year), 0-36 danxt2 0 — 34 4 Year
Chaos overall scale (4 Year), standardised dchatot -2.24 — 4.85 4 Year
Child Verbal/Language composite (7 year twin phone), standardised gcl1 -3.04 — 5.9 7 Year
Child Verbal/Language composite (7 year twin phone), standardised gcl2 -3.04 — 5.9 7 Year
Child Non-verbal composite (7 year twin phone), standardised gcn1 -4.32 — 3.15 7 Year
Child Non-verbal composite (7 year twin phone), standardised gcn2 -4.32 — 3.15 7 Year
2-subject (maths, English) mean NC level (7 year teacher), 0-4 gt2ac1 0 — 4 7 Year
2-subject (maths, English) mean NC level (7 year teacher), 0-4 gt2ac2 0 — 4 7 Year
SDQ hyperactivity total (7 year parent), 0-10 gpsdqhypt1 0 — 10 7 Year
SDQ hyperactivity total (7 year parent), 0-10 gpsdqhypt2 0 — 10 7 Year
SDQ conduct total (7 year parent), 0-10 gpsdqcont1 0 — 10 7 Year
SDQ conduct total (7 year parent), 0-10 gpsdqcont2 0 — 10 7 Year
ARBQ overall anxiety total (7 year parent), 0-52 gpanxt1 0 — 46 7 Year
ARBQ overall anxiety total (7 year parent), 0-52 gpanxt2 0 — 46 7 Year
Conners ADHD overall Total at 8 (0-54) hconnt1 0 — 54 8 Year
Conners ADHD overall Total at 8 (0-54) hconnt2 0 — 54 8 Year
Verbal composite (9 year child), standardised icvb1 -4.58 — 2.61 9 Year
Verbal composite (9 year child), standardised icvb2 -4.58 — 2.61 9 Year
Non-Verbal composite (9 year child), standardised icnv1 -3.68 — 1.31 9 Year
Non-Verbal composite (9 year child), standardised icnv2 -3.68 — 1.31 9 Year
3-subject (English, maths, science) mean NC level (9 year teacher), 1-5 it3ac1 1 — 5 9 Year
SDQ Conduct scale (child self-report) at 9, 0-10 icsdqcont1 0 — 10 9 Year
SDQ Conduct scale (child self-report) at 9, 0-10 icsdqcont2 0 — 10 9 Year
SDQ Hyperactivity scale (child self-report) at 9, 0-10 icsdqhypt1 0 — 10 9 Year
SDQ Hyperactivity scale (child self-report) at 9, 0-10 icsdqhypt2 0 — 10 9 Year
SDQ Hyperactivity scale (parent) at 9, 0-10 ipsdqhypt1 0 — 10 9 Year
SDQ Hyperactivity scale (parent) at 9, 0-10 ipsdqhypt2 0 — 10 9 Year
SDQ Conduct scale (parent) at 9, 0-10 ipsdqcont1 0 — 10 9 Year
SDQ Conduct scale (parent) at 9, 0-10 ipsdqcont2 0 — 10 9 Year
Negative parental feelings scale, child-self-rated at 9, 0-8 icparnegt1 0 — 8 9 Year
Negative parental feelings scale, child-self-rated at 9, 0-8 icparnegt2 0 — 8 9 Year
ARBQ overall anxiety total (9 year parent), 0-50 ipanxt1 0 — 42 9 Year
ARBQ overall anxiety total (9 year parent), 0-50 ipanxt2 0 — 42 9 Year
Parent Chaos scale at 9, 0-12 ipchatot 0 — 12 9 Year
Child Non-verbal composite (10 year twin web), standardised jcnv1 -5.2 — 2.45 10 Year
Child Non-verbal composite (10 year twin web), standardised jcnv2 -5.2 — 2.45 10 Year
Child Verbal composite (10 year twin web), standardised jcvb1 -4.14 — 3.07 10 Year
Child Verbal composite (10 year twin web), standardised jcvb2 -4.14 — 3.07 10 Year
3-subject (English, maths, science) mean NC level (10 year teacher), 1-5 jt3ac1 1 — 5 10 Year
3-subject (English, maths, science) overall mean NC level from parent-reported school report at 12, 2-7 lp3ac1 2 — 7 12 Year
3-subject (English, maths, science) overall mean NC level from parent-reported school report at 12, 2-7 lp3ac2 2 — 7 12 Year
SDQ Hyperactivity scale (child self-report) at 12, 0-10 lcsdqhypt1 0 — 10 12 Year
SDQ Hyperactivity scale (child self-report) at 12, 0-10 lcsdqhypt2 0 — 10 12 Year
SDQ Conduct scale (child self-report) at 12, 0-10 lcsdqcont1 0 — 10 12 Year
SDQ Conduct scale (child self-report) at 12, 0-10 lcsdqcont2 0 — 10 12 Year
SDQ Hyperactivity scale (parent) at 12, 0-10 lpsdqhypt1 0 — 10 12 Year
SDQ Hyperactivity scale (parent) at 12, 0-10 lpsdqhypt2 0 — 10 12 Year
SDQ Conduct scale (parent) at 12, 0-10 lpsdqcont1 0 — 10 12 Year
SDQ Conduct scale (parent) at 12, 0-10 lpsdqcont2 0 — 10 12 Year
MFQ scale from 11 MFQ items (child self-report) at 12, 0-22 lcmfqt1 0 — 22 12 Year
MFQ scale from 11 MFQ items (child self-report) at 12, 0-22 lcmfqt2 0 — 22 12 Year
MFQ scale from 11 MFQ items (parent) at 12, 0-22 lpmfqt1 0 — 22 12 Year
MFQ scale from 11 MFQ items (parent) at 12, 0-22 lpmfqt2 0 — 22 12 Year
Conners ADHD overall scale (parent) at 12, 0-54 lpconnt1 0 — 54 12 Year
Conners ADHD overall scale (parent) at 12, 0-54 lpconnt2 0 — 54 12 Year
Parental Feelings negative subscale (child self-reported) at 12, 0-8 lcparnegt1 0 — 8 12 Year
Parental Feelings negative subscale (child self-reported) at 12, 0-8 lcparnegt2 0 — 8 12 Year
Chaos scale (parent-reported, from 5 items) at 12, 0-10 lpchatot 0 — 10 12 Year
Chaos scale (child self-reported, from 6 items) at 12, 0-12 lcchato1 0 — 12 12 Year
Chaos scale (child self-reported, from 6 items) at 12, 0-12 lcchato2 0 — 12 12 Year
End of KS3 3-subject Academic achievement mean level (from parent SLQ), 1-9 npks3t3a1 1 — 9 14 Year
End of KS3 3-subject Academic achievement mean level (from parent SLQ), 1-9 npks3t3a2 1 — 9 14 Year
Conners Inattention scale at 14 (child), 0-27 ncconint1 0 — 27 14 Year
Conners Inattention scale at 14 (child), 0-27 ncconint2 0 — 27 14 Year
Conners Hyperactivity-Impulsivity scale at 14 (child), 0-27 ncconhit1 0 — 27 14 Year
Conners Hyperactivity-Impulsivity scale at 14 (child), 0-27 ncconhit2 0 — 27 14 Year
Conners total scale at 14 (parent), 0-54 npconnt1 0 — 54 14 Year
Conners total scale at 14 (parent), 0-54 npconnt2 0 — 54 14 Year
Negative Parental Feelings scale at 14 (child), 0-8 ncparnegt1 0 — 8 14 Year
Negative Parental Feelings scale at 14 (child), 0-8 ncparnegt2 0 — 8 14 Year
Chaos at home total scale at 14 (child), 0-12 ncchato1 0 — 11 14 Year
Chaos at home total scale at 14 (child), 0-12 ncchato2 0 — 11 14 Year
SDQ Conduct scale (child behaviour qnr at 16), 0-10 pcbhsdqcont1 0 — 10 16 Year
SDQ Conduct scale (child behaviour qnr at 16), 0-10 pcbhsdqcont2 0 — 10 16 Year
SDQ Hyperactivity scale (child behaviour qnr at 16), 0-10 pcbhsdqhypt1 0 — 10 16 Year
SDQ Hyperactivity scale (child behaviour qnr at 16), 0-10 pcbhsdqhypt2 0 — 10 16 Year
SDQ Conduct scale (parent behaviour qnr at 16), 0-10 ppbhsdqcont1 0 — 10 16 Year
SDQ Conduct scale (parent behaviour qnr at 16), 0-10 ppbhsdqcont2 0 — 10 16 Year
SDQ Hyperactivity scale (parent behaviour qnr at 16), 0-10 ppbhsdqhypt1 0 — 10 16 Year
SDQ Hyperactivity scale (parent behaviour qnr at 16), 0-10 ppbhsdqhypt2 0 — 10 16 Year
ARBQ Anxiety overall total scale (parent behaviour qnr at 16), 0-38 ppbhanxt1 0 — 33.78 16 Year
ARBQ Anxiety overall total scale (parent behaviour qnr at 16), 0-38 ppbhanxt2 0 — 33.78 16 Year
Chaos total score (child web at 16), 0-12 pcchatot1 0 — 12 16 Year
Chaos total score (child web at 16), 0-12 pcchatot2 0 — 12 16 Year
SDQ Conduct total score (TEDS21 phase 1 twin qnr), 0-10 u1csdqcont1 0 — 9 21 Year
SDQ Conduct total score (TEDS21 phase 1 twin qnr), 0-10 u1csdqcont2 0 — 9 21 Year
SDQ Hyperactivity total score (TEDS21 phase 1 twin qnr), 0-10 u1csdqhypt1 0 — 10 21 Year
SDQ Hyperactivity total score (TEDS21 phase 1 twin qnr), 0-10 u1csdqhypt2 0 — 10 21 Year
SDQ Conduct total score (TEDS26 twin MHQ), 0-10 zmhsdqcont1 0 — 9 26 Year
SDQ Conduct total score (TEDS26 twin MHQ), 0-10 zmhsdqcont2 0 — 9 26 Year
SDQ Hyperactivity total score (TEDS26 twin MHQ), 0-10 zmhsdqhypt1 0 — 10 26 Year
SDQ Hyperactivity total score (TEDS26 twin MHQ), 0-10 zmhsdqhypt2 0 — 10 26 Year
G composite scale from child web tests at 12, standardised lcg1 -3.67 — 3.04 12 Year
G composite scale from child web tests at 12, standardised lcg2 -3.67 — 3.04 12 Year
G composite scale from child web tests at 14, standardised ncg1 -4.12 — 3.17 14 Year
G composite scale from child web tests at 14, standardised ncg2 -4.12 — 3.17 14 Year
G composite scale from child web tests at 16, standardised pcg1 -2.86 — 4.06 16 Year
G composite scale from child web tests at 16, standardised pcg2 -2.86 — 4.06 16 Year
G-game overall total score, 0-40 ucgt1 3 — 40 21 Year
G-game overall total score, 0-40 ucgt2 3 — 40 21 Year
End of KS3 all-subject Academic achievement mean level (from parent SLQ), 1-9 npks3tall1 1 — 9 14 Year
End of KS3 all-subject Academic achievement mean level (from parent SLQ), 1-9 npks3tall2 1 — 9 14 Year
Core subjects (English, maths, science): mean grade in GCSE results (twin exams at 16), 4-11 pcexgcsecoregrdm1 4 — 11 16 Year
Core subjects (English, maths, science): mean grade in GCSE results (twin exams at 16), 4-11 pcexgcsecoregrdm2 4 — 11 16 Year
Twin probable highest level of qualification including current study (TEDS21 phase 1 twin qnr), 1-11 see value labels u1chqualp1 1 — 11 21 Year
Twin probable highest level of qualification including current study (TEDS21 phase 1 twin qnr), 1-11 see value labels u1chqualp2 1 — 11 21 Year
Demographics item: highest qualification ordinal level (TEDS26 twin MHQ), see value labels zmhhqual1 1 — 11 26 Year
Demographics item: highest qualification ordinal level (TEDS26 twin MHQ), see value labels zmhhqual2 1 — 11 26 Year
MFQ total scale (child behaviour qnr at 16), 0-26 pcbhmfqt1 0 — 26 16 Year
MFQ total scale (child behaviour qnr at 16), 0-26 pcbhmfqt2 0 — 26 16 Year
MFQ overall total score (TEDS21 phase 1 twin qnr), 0-16 u1cmfqt1 0 — 16 21 Year
MFQ overall total score (TEDS21 phase 1 twin qnr), 0-16 u1cmfqt2 0 — 16 21 Year
MFQ overall total score (TEDS26 twin MHQ), 0-26 zmhmfqt1 0 — 26 26 Year
MFQ overall total score (TEDS26 twin MHQ), 0-26 zmhmfqt2 0 — 26 26 Year
General Anxiety overall total score (TEDS21 phase 2 twin qnr), 0-40 u2cganxt1 0 — 40 21 Year
General Anxiety overall total score (TEDS21 phase 2 twin qnr), 0-40 u2cganxt2 0 — 40 21 Year
GAD-D (General Anxiety) overall total score (TEDS26 twin MHQ), 0-40 zmhganxt1 0 — 40 26 Year
GAD-D (General Anxiety) overall total score (TEDS26 twin MHQ), 0-40 zmhganxt2 0 — 40 26 Year
Note: Externalising SDQ scores are created post-imputation by adding conduct and hyperactivity problem scales. Range or Level shows min—max values for numeric variables or factor levels for categorical variables (reference level marked with *). Variable codes with an asterisk (*) have been derived or modified from the original dataset.